home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
- /* substitute metachar with special symbol */
- /* if regularr expression, then set flag REGEX */
- /* if REGEX and MULTIPAT then report error message, */
- /* -w only for single word pattern. If WORDBOUND & MULTIWORD error */
- /* process start of line, endof line symbol, */
- /* process -w WORDBOUND option, append special symbol at begin&end of */
- /* process -d option before this routine */
- /* the delimiter pattern is in D_pattern (need to end with '; ') */
- /* if '-t' (suggestion: how about -B) the pattern is passed to sgrep */
- /* and doesn't go here */
- /* in that case, -d is ignored? or not necessary */
- /* upon return, Pattern contains the pattern to be processed by maskgen */
- /* D_pattern contains transformed D_pattern */
-
- #include "agrep.h"
-
- extern int SIMPLEPATTERN, WHOLELINE, REGEX, RE_ERR, DELIMITER, TAIL, WORDBOUND;
- extern int HEAD;
- extern CHAR Progname[];
- extern int D_length;
- extern int table[WORD][WORD];
-
- preprocess(D_pattern, Pattern) /* need two parameters */
- CHAR *D_pattern, *Pattern;
- {
- CHAR temp[Maxline], *r_pat, *old_pat; /* r_pat for r.e. */
- CHAR old_D_pat[MaxDelimit];
- int i, j=0, rp=0, m, t=0, partitions, num_pos, ANDON = 0;
- int d_end ;
- int IN_RANGE=0, EVEN=0, OR_AND=0;
- old_pat = Pattern; /* to remember the starting position */
- m = strlen(Pattern);
- for(i=0; i< m; i++) {
- if(Pattern[i] == '\\') i++;
- else if(Pattern[i] == '|' || Pattern[i] == '*' ) REGEX = ON;
- }
- r_pat = (CHAR *) malloc(strlen(Pattern)+2*strlen(D_pattern));
- strcpy(temp, D_pattern);
- d_end = t = strlen(temp); /* size of D_pattern, including '; ' */
- if (WHOLELINE) { temp[t++] = LANGLE;
- temp[t++] = NNLINE;
- temp[t++] = RANGLE;
- temp[t] = '\0';
- strcat(temp, Pattern);
- m = strlen(temp);
- temp[m++] = LANGLE;
- temp[m++] = '\n';
- temp[m++] = RANGLE;
- temp[m] = '\0'; }
- else {
- if (WORDBOUND) { temp[t++] = LANGLE;
- temp[t++] = WORDB;
- temp[t++] = RANGLE;
- temp[t] = '\0'; }
- strcat(temp, Pattern);
- m = strlen(temp);
- if (WORDBOUND) { temp[m++] = LANGLE;
- temp[m++] = WORDB;
- temp[m++] = RANGLE; }
- temp[m] = '\0';
- }
- /* now temp contains augmented pattern , m it's size */
-
- D_length = 0;
- for (i=0, j=0; i< d_end-2; i++) {
- switch(temp[i])
- {
- case '\\' : i++;
- Pattern[j++] = temp[i];
- old_D_pat[D_length++] = temp[i];
- break;
- case '<' : Pattern[j++] = LANGLE;
- break;
- case '>' : Pattern[j++] = RANGLE;
- break;
- case '^' : Pattern[j++] = '\n';
- old_D_pat[D_length++] = temp[i];
- break;
- case '$' : Pattern[j++] = '\n';
- old_D_pat[D_length++] = temp[i];
- break;
- default : Pattern[j++] = temp[i];
- old_D_pat[D_length++] = temp[i];
- break;
- }
- }
- if(D_length > MAXDELIM) {
- fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
- exit(2);
- }
- Pattern[j++] = ANDPAT;
- old_D_pat[D_length] = '\0';
- strcpy(D_pattern, old_D_pat);
- D_length++;
- /*
- Pattern[j++] = ' ';
- */
- Pattern[j] = '\0';
- rp = 0;
- if(REGEX) {
- r_pat[rp++] = '.'; /* if REGEX: always append '.' in front */
- r_pat[rp++] = '(';
- Pattern[j++] = NOCARE;
- HEAD = ON;
- }
- for (i=d_end; i < m ; i++)
- {
- switch(temp[i])
- {
- case '\\': i++; Pattern[j++] = temp[i];
- r_pat[rp++] = 'o'; /* the symbol doesn't matter */
- break;
- case '#': if(REGEX) {
- Pattern[j++] = NOCARE;
- r_pat[rp++] = '.';
- r_pat[rp++] = '*';
- break; }
- Pattern[j++] = WILDCD;
- break;
- case '(': Pattern[j++] = LPARENT;
- r_pat[rp++] = '(';
- break;
- case ')': Pattern[j++] = RPARENT;
- r_pat[rp++] = ')';
- break;
- case '[': Pattern[j++] = LRANGE;
- r_pat[rp++] = '[';
- IN_RANGE = ON;
- break;
- case ']': Pattern[j++] = RRANGE;
- r_pat[rp++] = ']';
- IN_RANGE = OFF;
- break;
- case '<': Pattern[j++] = LANGLE;
- break;
- case '>': Pattern[j++] = RANGLE;
- break;
- case '^': if (temp[i-1] == '[') Pattern[j++] = NOTSYM;
- else Pattern[j++] = '\n';
- r_pat[rp++] = '^';
- break;
- case '$': Pattern[j++] = '\n';
- r_pat[rp++] = '$';
- break;
- case '.': Pattern[j++] = NOCARE;
- r_pat[rp++] = '.';
- break;
- case '*': Pattern[j++] = STAR;
- r_pat[rp++] = '*';
- break;
- case '|': Pattern[j++] = ORSYM;
- r_pat[rp++] = '|';
- break;
- case ',': Pattern[j++] = ORPAT;
- RE_ERR = ON;
- break;
- case ';': if(ANDON) RE_ERR = ON;
- Pattern[j++] = ANDPAT;
- ANDON = ON;
- break;
- case '-': if(IN_RANGE) {
- Pattern[j++] = HYPHEN;
- r_pat[rp++] = '-';
- }
- else {
- Pattern[j++] = temp[i];
- r_pat[rp++] = temp[i];
- }
- break;
- case NNLINE :
- Pattern[j++] = temp[i];
- r_pat[rp++] = 'N';
- break;
- default: Pattern[j++] = temp[i];
- r_pat[rp++] = temp[i];
- break;
- }
- }
- if(REGEX) { /* append ').' at end of regular expression */
- r_pat[rp++] = ')';
- r_pat[rp++] = '.';
- Pattern[j++] = NOCARE;
- TAIL = ON;
- }
- Pattern[j] = '\0';
- m = j;
- r_pat[rp] = '\0';
- if(REGEX)
- {
- if(DELIMITER || WORDBOUND) {
- fprintf(stderr, "%s: -d or -w option is not supported for this pattern\n", Progname);
- exit(2);
- }
- if(RE_ERR) {
- fprintf(stderr, "%s: illegal regular expression\n", Progname);
- exit(2);
- }
- while(*Pattern != NOCARE && m-- > 0) Pattern++; /* poit to . */
- num_pos = init(r_pat, table);
- if(num_pos <= 0) {
- fprintf(stderr, "%s: illegal regular expression\n", Progname);
- exit(2);
- }
- if(num_pos > 30) {
- fprintf(stderr, "%s: regular expression too long\n", Progname);
- exit(2);
- }
- strcpy(old_pat, Pattern); /* do real change to the Pattern to be returned */
- return;
- } /* if regex */
-
- return;
- }
-
-